Load needed packages to use haploreconstruct & haplovalidate
##HAPLORECONSTRUCT
#load functions (+ required packages) ###############################
#setwd("/Users/dagny/Dropbox (PopGen)/haplotypes/")
source("functions/EEC.freq.traj.R")
## Loading required package: plyr
## Warning: package 'plyr' was built under R version 4.1.2
##
## Attaching package: 'plyr'
## The following object is masked from 'package:ggpubr':
##
## mutate
source("functions/EEC.manhattan.plot.R")
sessionInfo()
## R version 4.1.0 (2021-05-18)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur 10.16
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] plyr_1.8.7 ggpubr_0.4.0 ggplot2_3.3.6
## [4] randomcoloR_1.1.0.1 haploReconstruct_0.1.3 haplovalidate_0.1.5
## [7] RColorBrewer_1.1-3 stringr_1.4.0 psych_2.1.9
## [10] data.table_1.14.2
##
## loaded via a namespace (and not attached):
## [1] Rcpp_1.0.9 lattice_0.20-45 tidyr_1.1.4 zoo_1.8-9
## [5] gtools_3.9.2 digest_0.6.29 foreach_1.5.1 utf8_1.2.2
## [9] V8_4.2.2 R6_2.5.1 backports_1.4.1 evaluate_0.14
## [13] pillar_1.8.0 gplots_3.1.1 rlang_1.0.4 curl_4.3.2
## [17] rstudioapi_0.13 car_3.0-12 jquerylib_0.1.4 rmarkdown_2.11
## [21] Rtsne_0.16 igraph_1.2.11 munsell_0.5.0 broom_0.7.10
## [25] compiler_4.1.0 xfun_0.27 pkgconfig_2.0.3 mnormt_2.0.2
## [29] tmvnsim_1.0-2 htmltools_0.5.2 tidyselect_1.1.2 tibble_3.1.8
## [33] codetools_0.2-18 matrixStats_0.61.0 fansi_1.0.3 dplyr_1.0.9
## [37] withr_2.5.0 bitops_1.0-7 grid_4.1.0 nlme_3.1-153
## [41] jsonlite_1.7.2 gtable_0.3.0 lifecycle_1.0.1 magrittr_2.0.3
## [45] scales_1.2.0 KernSmooth_2.23-20 carData_3.0-4 cli_3.3.0
## [49] stringi_1.7.5 cachem_1.0.6 ggsignif_0.6.3 dbscan_1.1-10
## [53] bslib_0.4.0 vctrs_0.4.1 generics_0.1.3 iterators_1.0.13
## [57] tools_4.1.0 glue_1.6.2 purrr_0.3.4 abind_1.4-5
## [61] parallel_4.1.0 fastmap_1.1.0 yaml_2.2.1 colorspace_2.0-3
## [65] cluster_2.1.2 caTools_1.18.2 rstatix_0.7.0 knitr_1.36
## [69] sass_0.4.2
#function for clustering ###############################
perform_clustering<-function(l.freqs, l.min.cl.cor=min.cl.cor, l.min.minor.freq=min.minor.freq, l.max.minor.freq=max.minor.freq,
l.min.freq.change=min.freq.change,l.min.repl=min.repl,l.min.cl.size=min.cl.size,
l.win.size=win.size,l.ngen=ngen,l.run.id="01",l.chr=c("2","3")){
print("Initialize Time Series ...")
l.markers<-data.frame("chr"=character(),"pos"=numeric(),"cluster"=numeric())
#perform haplotype reconstruct for the chromosomes present in your data:
for(l.chr.iter in l.chr){
#format time series data
temp.freqs<-subset(l.freqs,chr==l.chr.iter)
timeSeries<-initialize_SNP_time_series(chr=temp.freqs$chr,pos=temp.freqs$pos,base.freq=temp.freqs$basePops,
lib.freqs=temp.freqs[,grep("L",colnames(temp.freqs)),with=F],pop.ident=rep(c(1:10),l.ngen),
pop.generation = rep(seq(0,l.ngen*10-10,10),each=10),
use.libs = rep(T,10*l.ngen), min.minor.freq=l.min.minor.freq,
max.minor.freq = l.max.minor.freq, winsize = l.win.size, min.lib.frac = 0.75,
minfreqchange = l.min.freq.change, win.scale = "bp", minrepl = l.min.repl)
print(paste("Clustering on chromosome",l.chr.iter,"is running. Please be patient ...",sep=" "))
hbs<-reconstruct_hb(timeSeries,chrom=l.chr.iter, min.cl.size=l.min.cl.size, min.cl.cor=l.min.cl.cor,min.inter=4,single.win=T)
if (number_hbr(hbs)!=0){
n.clusters<-number_hbr(hbs)
for (j in c(1:n.clusters)){
temp.markers<-data.frame("chr"=rep(l.chr.iter,length(markers(hbs,j))),"pos"=markers(hbs,j),"cluster"=rep(j,length(markers(hbs,j))))
l.markers<-rbind(l.markers,temp.markers)
}
}
}
#arrange & format result
l.markers<-arrange(l.markers,chr,pos)
l.markers$id<-paste(l.markers$chr,l.markers$cluster,sep=".")
#save clustering as .rds
toSave<-paste("HaploReconstruct-Min-Cl-Corr",l.min.cl.cor,"-Min-Minor-Freq",l.min.minor.freq,
"-Max-Minor-Freq",l.max.minor.freq,"-Min-Freq-Change",l.min.freq.change,"-Min-Repl",l.min.repl,
"-Min-Cl-Size",l.min.cl.size,"-Win-Size",l.win.size,"Run-ID",l.run.id,".rds",sep="")
saveRDS(hbs,file = toSave)
return(l.markers)
}
obtain_colors<-function(x,y){
n <- length(unique(x$id))
palette <- distinctColorPalette(n)
x$id<-factor(x$id)
x$color<-x$id
levels(x$color)<-palette
z<-merge(x,y,by=c("chr","pos"))
return(z)
}
#data ###############################
#target sites:
targets<-readRDS("data/929_targets_all.rds")
#ACER CMH results:
cmh<-readRDS("data/sim929_acer_cmh.rds")
colnames(cmh)<-c("chr","pos","value")
#sync file:
candSNP<-readRDS("data/sim929_cands.rds")
Explore the data
cmh
## chr pos value
## 1: 2 196428 2.024998
## 2: 2 356282 1.595361
## 3: 2 498592 1.984070
## 4: 2 501458 1.028718
## 5: 2 504144 1.116426
## ---
## 16850: 3 48383675 1.141732
## 16851: 3 48543097 1.364474
## 16852: 3 48586008 2.848238
## 16853: 3 48635159 1.593615
## 16854: 3 48899371 1.105892
#View(cmh)
candSNP
## chr pos ref riseallele fallallele basePops L1 L2
## 1: 2 196428 G G T 0.85921325 0.84210526 0.73913043
## 2: 2 356282 A G A 0.04624277 0.04347826 0.01785714
## 3: 2 498592 C G C 0.10642570 0.14634146 0.12195122
## 4: 2 708369 A A G 0.57319588 0.67500000 0.54098361
## 5: 2 980126 C A C 0.27845528 0.29166667 0.26190476
## ---
## 7082: 3 9904945 G A G 0.01169591 0.03571429 0.02325581
## 7083: 3 11264019 G T G 0.03125000 0.02173913 0.00000000
## 7084: 3 29532659 T T C 0.75104603 0.73134328 0.79069767
## 7085: 3 13611701 A G A 0.37404580 0.30909091 0.26666667
## 7086: 3 10421244 T C T 0.09919028 0.09375000 0.07547170
## L3 L4 L5 L6 L7 L8
## 1: 0.94117647 0.86666667 0.86666667 0.90566038 0.83636364 0.78260870
## 2: 0.01886792 0.07017544 0.06896552 0.04347826 0.07843137 0.01754386
## 3: 0.17307692 0.09090909 0.05000000 0.06250000 0.10416667 0.12962963
## 4: 0.45652174 0.61702128 0.61290323 0.54347826 0.62790698 0.52083333
## 5: 0.21153846 0.46666667 0.20000000 0.31147541 0.17777778 0.35555556
## ---
## 7082: 0.00000000 0.00000000 0.02222222 0.01612903 0.02380952 0.00000000
## 7083: 0.02325581 0.09090909 0.00000000 0.06122449 0.00000000 0.00000000
## 7084: 0.83720930 0.76595745 0.63265306 0.78260870 0.75000000 0.75000000
## 7085: 0.32758621 0.51785714 0.39285714 0.26530612 0.44186047 0.41071429
## 7086: 0.04878049 0.06666667 0.17241379 0.14285714 0.05882353 0.11320755
## L9 L10 L11 L12 L13 L14
## 1: 0.88709677 0.90476190 0.92424242 0.97142857 0.91836735 0.97777778
## 2: 0.03278689 0.08823529 0.03921569 0.09615385 0.10204082 0.08333333
## 3: 0.10909091 0.09090909 0.08955224 0.12962963 0.10909091 0.18333333
## 4: 0.60000000 0.54761905 0.64912281 0.61403509 0.59523810 0.62962963
## 5: 0.21428571 0.23809524 0.25423729 0.33333333 0.37254902 0.34615385
## ---
## 7082: 0.00000000 0.00000000 0.05000000 0.05660377 0.07575758 0.02000000
## 7083: 0.06000000 0.03125000 0.03773585 0.00000000 0.13953488 0.09090909
## 7084: 0.73913043 0.75675676 0.78378378 0.80952381 0.86792453 0.87931034
## 7085: 0.32500000 0.43939394 0.31818182 0.45454545 0.43396226 0.38775510
## 7086: 0.11764706 0.07407407 0.08333333 0.13333333 0.12765957 0.07692308
## L15 L16 L17 L18 L19 L20
## 1: 0.97727273 0.90322581 0.92452830 0.86538462 1.00000000 0.93103448
## 2: 0.12500000 0.07547170 0.00000000 0.09433962 0.15094340 0.06521739
## 3: 0.10869565 0.24444444 0.06818182 0.10000000 0.12765957 0.17647059
## 4: 0.69047619 0.72972973 0.60784314 0.70175439 0.70689655 0.65116279
## 5: 0.40909091 0.19148936 0.28888889 0.41304348 0.34545455 0.36170213
## ---
## 7082: 0.01818182 0.02380952 0.01785714 0.06000000 0.02083333 0.00000000
## 7083: 0.09259259 0.02500000 0.06521739 0.03636364 0.10416667 0.00000000
## 7084: 0.65789474 0.83076923 0.71052632 0.82608696 0.77500000 0.81818182
## 7085: 0.45652174 0.34615385 0.39534884 0.45098039 0.33333333 0.42857143
## 7086: 0.06250000 0.20512821 0.03333333 0.10344828 0.02500000 0.10204082
## L21 L22 L23 L24 L25 L26
## 1: 0.92307692 0.88888889 0.88636364 0.96000000 0.91111111 0.90740741
## 2: 0.11475410 0.12820513 0.08510638 0.09523810 0.24590164 0.14545455
## 3: 0.07142857 0.14285714 0.16949153 0.10204082 0.17777778 0.16981132
## 4: 0.63793103 0.64062500 0.58000000 0.65714286 0.68292683 0.66666667
## 5: 0.23913043 0.31111111 0.37735849 0.30909091 0.34042553 0.30508475
## ---
## 7082: 0.01754386 0.02222222 0.07142857 0.06000000 0.19607843 0.01724138
## 7083: 0.00000000 0.04347826 0.08771930 0.04761905 0.08510638 0.07547170
## 7084: 0.82352941 0.79166667 0.81818182 0.88461538 0.82692308 0.72549020
## 7085: 0.30952381 0.43859649 0.44897959 0.45652174 0.56250000 0.52000000
## 7086: 0.15686275 0.13636364 0.20754717 0.07843137 0.18965517 0.09259259
## L27 L28 L29 L30 L31 L32
## 1: 0.91176471 0.95833333 1.00000000 0.93333333 0.94444444 0.93617021
## 2: 0.04166667 0.06382979 0.07692308 0.04651163 0.17948718 0.04918033
## 3: 0.14285714 0.28070175 0.20370370 0.18181818 0.21739130 0.18604651
## 4: 0.58536585 0.59649123 0.61818182 0.52173913 0.81578947 0.82812500
## 5: 0.41818182 0.44000000 0.37500000 0.35294118 0.36956522 0.32258065
## ---
## 7082: 0.01818182 0.04444444 0.00000000 0.00000000 0.06122449 0.00000000
## 7083: 0.02325581 0.11475410 0.07692308 0.03508772 0.02500000 0.02272727
## 7084: 0.80769231 0.75409836 0.78688525 0.85714286 0.84905660 0.85245902
## 7085: 0.29166667 0.47916667 0.50000000 0.55555556 0.45283019 0.57777778
## 7086: 0.12765957 0.15000000 0.12068966 0.04347826 0.21153846 0.10000000
## L33 L34 L35 L36 L37 L38 L39
## 1: 0.9772727 0.93617021 0.9230769 0.9565217 0.95555556 0.93023256 0.91666667
## 2: 0.1020408 0.04444444 0.1016949 0.1764706 0.07692308 0.11111111 0.12068966
## 3: 0.1190476 0.20338983 0.2000000 0.1000000 0.06818182 0.17307692 0.11764706
## 4: 0.7659574 0.59259259 0.6346154 0.6857143 0.65116279 0.67741935 0.71186441
## 5: 0.4000000 0.51162791 0.4042553 0.4655172 0.28260870 0.32142857 0.24444444
## ---
## 7082: 0.0000000 0.01818182 0.1590909 0.1111111 0.00000000 0.05357143 0.00000000
## 7083: 0.1836735 0.06976744 0.1250000 0.1489362 0.01587302 0.07692308 0.02272727
## 7084: 0.8703704 0.85365854 0.7884615 0.7636364 0.84905660 0.73076923 0.81818182
## 7085: 0.4303797 0.65909091 0.5555556 0.4615385 0.42553191 0.37500000 0.33928571
## 7086: 0.1923077 0.11111111 0.3396226 0.1269841 0.10869565 0.13636364 0.06521739
## L40 L41 L42 L43 L44 L45
## 1: 1.00000000 0.93617021 0.91836735 0.95918367 0.98113208 0.9583333
## 2: 0.07692308 0.05357143 0.05454545 0.15873016 0.06122449 0.1363636
## 3: 0.06451613 0.21276596 0.14814815 0.15517241 0.19444444 0.3589744
## 4: 0.72000000 0.73770492 0.64583333 0.65384615 0.65000000 0.5961538
## 5: 0.35416667 0.34090909 0.28571429 0.39655172 0.38095238 0.3518519
## ---
## 7082: 0.02127660 0.00000000 0.02272727 0.08695652 0.04838710 0.2075472
## 7083: 0.09433962 0.01851852 0.00000000 0.18750000 0.17021277 0.2727273
## 7084: 0.90909091 0.88333333 0.90909091 0.83076923 0.88888889 0.8983051
## 7085: 0.54901961 0.40983607 0.60000000 0.53703704 0.55555556 0.7169811
## 7086: 0.12765957 0.05405405 0.07142857 0.10416667 0.13793103 0.2500000
## L46 L47 L48 L49 L50 L51
## 1: 0.96296296 0.95000000 0.92307692 0.90566038 0.92857143 0.89130435
## 2: 0.06818182 0.12195122 0.15000000 0.06451613 0.05357143 0.13461538
## 3: 0.25000000 0.04878049 0.22222222 0.18518519 0.11864407 0.15000000
## 4: 0.79069767 0.71111111 0.64444444 0.66666667 0.73076923 0.78260870
## 5: 0.34615385 0.28947368 0.56521739 0.42307692 0.35897436 0.55102041
## ---
## 7082: 0.11764706 0.07500000 0.02173913 0.00000000 0.02040816 0.00000000
## 7083: 0.03389831 0.07272727 0.21666667 0.09090909 0.03508772 0.03703704
## 7084: 0.80952381 0.86792453 0.90384615 0.82926829 0.78688525 0.87500000
## 7085: 0.44155844 0.33928571 0.43636364 0.51851852 0.58333333 0.42857143
## 7086: 0.17647059 0.06976744 0.27083333 0.10869565 0.03636364 0.09677419
## L52 L53 L54 L55 L56 L57
## 1: 0.83333333 1.00000000 0.98039216 0.9250000 0.95744681 0.91525424
## 2: 0.04651163 0.06896552 0.11320755 0.1914894 0.07843137 0.08510638
## 3: 0.16666667 0.06666667 0.25490196 0.1777778 0.24528302 0.07142857
## 4: 0.70175439 0.69090909 0.62222222 0.6666667 0.75000000 0.51562500
## 5: 0.26415094 0.39534884 0.34693878 0.3953488 0.34615385 0.43137255
## ---
## 7082: 0.00000000 0.16981132 0.18750000 0.3333333 0.11666667 0.07017544
## 7083: 0.08695652 0.26229508 0.14000000 0.6274510 0.18000000 0.06000000
## 7084: 0.95384615 0.91489362 0.91489362 0.8846154 0.79629630 0.81666667
## 7085: 0.54545455 0.50980392 0.61538462 0.7358491 0.39622642 0.28571429
## 7086: 0.13333333 0.24489796 0.07142857 0.2950820 0.11363636 0.31707317
## L58 L59 L60 L61 L62 L63 L64
## 1: 0.92307692 0.90909091 0.95161290 0.98113208 0.9661017 1.0000000 0.9705882
## 2: 0.10000000 0.22222222 0.02173913 0.20338983 0.0000000 0.1951220 0.1296296
## 3: 0.32203390 0.08888889 0.17500000 0.33333333 0.1891892 0.1914894 0.2241379
## 4: 0.70909091 0.77777778 0.60000000 0.85416667 0.8750000 0.7924528 0.7580645
## 5: 0.32558140 0.36956522 0.64705882 0.31707317 0.2000000 0.3200000 0.5000000
## ---
## 7082: 0.06666667 0.00000000 0.01724138 0.00000000 0.0000000 0.1800000 0.2400000
## 7083: 0.15384615 0.09615385 0.00000000 0.00000000 0.1463415 0.2916667 0.3750000
## 7084: 0.78787879 0.81818182 0.77358491 0.94230769 0.9285714 0.8888889 0.9583333
## 7085: 0.51851852 0.45283019 0.64814815 0.51612903 0.6071429 0.4893617 0.6415094
## 7086: 0.12765957 0.08000000 0.09090909 0.09615385 0.0800000 0.3188406 0.3529412
## L65 L66 L67 L68 L69 L70
## 1: 0.9791667 0.98387097 0.96825397 0.89090909 1.00000000 0.86792453
## 2: 0.3023256 0.15254237 0.05769231 0.09090909 0.15686275 0.04444444
## 3: 0.2407407 0.32432432 0.07547170 0.32692308 0.27906977 0.13333333
## 4: 0.7200000 0.78260870 0.65454545 0.68518519 0.64102564 0.63265306
## 5: 0.4814815 0.44186047 0.58620690 0.45833333 0.54901961 0.48979592
## ---
## 7082: 0.3400000 0.09090909 0.06122449 0.06382979 0.00000000 0.00000000
## 7083: 0.6315789 0.09090909 0.07017544 0.14814815 0.07142857 0.05769231
## 7084: 0.8214286 0.85106383 0.83333333 0.96000000 0.96078431 0.75409836
## 7085: 0.5849057 0.56521739 0.37037037 0.39655172 0.57500000 0.59090909
## 7086: 0.4230769 0.28571429 0.20000000 0.07407407 0.11904762 0.12500000
#View(cands)
cmh is storing a table with 1) chromosome 2) position 3)log10(pval), from CMH. This includes signficant and non-significant snps
candSNP includes 1) chromosome 2) position 3) ref allele 4) rising allele 5) falling allele 6) mean starting/base frequency 7-76) frequencies of each replicate across each time point (sorted by time points). To save some memory, cands is prefiltered, but usually this should have the same number of rows as cmh.
(If needed, how to create a cands file?)
min.minor.freq<-0
max.minor.freq<-1
min.freq.change<-0.15
min.repl<-2
min.cl.size<-20
win.size<-3e+06
ngen<-7
min.cl.cor<-0.6
#actual clustering
stringent.cluster<-perform_clustering(l.freqs = candSNP,l.run.id = "001",l.min.cl.cor = min.cl.cor)
## [1] "Initialize Time Series ..."
## Warning in validity_initialize_SNP_time_series(chr = chr, pos = pos, base.freq = base.freq, : The set minfreqchange is below the recommend change
## of 0.2, keep this in mind when working with the results.
## Warning in validity_initialize_SNP_time_series(chr = chr, pos = pos, base.freq = base.freq, : The set minrepl parameter is below the recommend number
## of 3, keep this in mind when working with the results.
## Warning in validity_initialize_SNP_time_series(chr = chr, pos = pos, base.freq = base.freq, : A too large range of starting frequencies is not
## recommended because it can result in too noisy correlations.
## Consider to modify parameters 'min.minor.freq' and 'max.minor.freq'.
## This parameter setting should only be used for exploratory purposes.
## [1] "Clustering on chromosome 2 is running. Please be patient ..."
## Warning in validity_initialize_SNP_time_series(chr = chr, pos = pos, base.freq = base.freq, : The set minfreqchange is below the recommend change
## of 0.2, keep this in mind when working with the results.
## Warning in validity_initialize_SNP_time_series(chr = chr, pos = pos, base.freq = base.freq, : The set minrepl parameter is below the recommend number
## of 3, keep this in mind when working with the results.
## Warning in validity_initialize_SNP_time_series(chr = chr, pos = pos, base.freq = base.freq, : A too large range of starting frequencies is not
## recommended because it can result in too noisy correlations.
## Consider to modify parameters 'min.minor.freq' and 'max.minor.freq'.
## This parameter setting should only be used for exploratory purposes.
## [1] "Clustering on chromosome 3 is running. Please be patient ..."
table(stringent.cluster$id)
##
## 2.1 2.2 2.3 2.4 2.5 2.6 2.7 2.8 2.9 3.1 3.2 3.3 3.4 3.5 3.6 3.7
## 138 69 20 45 136 111 20 54 38 878 60 31 23 25 52 134
relaxed.cluster<-perform_clustering(l.freqs = candSNP,l.run.id = "002",l.min.cl.cor = 0.2)
## [1] "Initialize Time Series ..."
## Warning in validity_initialize_SNP_time_series(chr = chr, pos = pos, base.freq = base.freq, : The set minfreqchange is below the recommend change
## of 0.2, keep this in mind when working with the results.
## Warning in validity_initialize_SNP_time_series(chr = chr, pos = pos, base.freq = base.freq, : The set minrepl parameter is below the recommend number
## of 3, keep this in mind when working with the results.
## Warning in validity_initialize_SNP_time_series(chr = chr, pos = pos, base.freq = base.freq, : A too large range of starting frequencies is not
## recommended because it can result in too noisy correlations.
## Consider to modify parameters 'min.minor.freq' and 'max.minor.freq'.
## This parameter setting should only be used for exploratory purposes.
## [1] "Clustering on chromosome 2 is running. Please be patient ..."
## Warning in validity_reconstruct_hb(object, chrom, min.cl.size, min.cl.cor, : min.cl.cor below 0.4 should not be used!
## Be aware that the used correlation threshold is extremely weak
## and the chance is high that unrelateted things are clustered together!
## Warning in validity_initialize_SNP_time_series(chr = chr, pos = pos, base.freq = base.freq, : The set minfreqchange is below the recommend change
## of 0.2, keep this in mind when working with the results.
## Warning in validity_initialize_SNP_time_series(chr = chr, pos = pos, base.freq = base.freq, : The set minrepl parameter is below the recommend number
## of 3, keep this in mind when working with the results.
## Warning in validity_initialize_SNP_time_series(chr = chr, pos = pos, base.freq = base.freq, : A too large range of starting frequencies is not
## recommended because it can result in too noisy correlations.
## Consider to modify parameters 'min.minor.freq' and 'max.minor.freq'.
## This parameter setting should only be used for exploratory purposes.
## [1] "Clustering on chromosome 3 is running. Please be patient ..."
## Warning in validity_reconstruct_hb(object, chrom, min.cl.size, min.cl.cor, : min.cl.cor below 0.4 should not be used!
## Be aware that the used correlation threshold is extremely weak
## and the chance is high that unrelateted things are clustered together!
table(relaxed.cluster$id)
##
## 2.1 2.2 2.3 3.1 3.2 3.3 3.4
## 1212 1032 591 101 2475 1195 26
#plotting:
stringent.cluster<-obtain_colors(stringent.cluster,cmh)
#png("res/demo-stringent.cluster.png",width=750,height=500)
print(get.plot.highlight(data = cmh,l.main="Stringent Clustering",highlight = stringent.cluster,highlight.col = stringent.cluster$color))
## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please
## use `guide = "none"` instead.
#dev.off()
#allele frequency trajectories:
#png("res/demo-stringent.cluster.af.png",width=750,height=500)
print(get.af.traj(l.sync=candSNP,l.cluster=stringent.cluster,l.cluster.id = "2.1"))
#dev.off()
relaxed.cluster<-obtain_colors(relaxed.cluster,cmh)
#png("res/demo-relaxed.cluster.png",width = 750,height = 500)
print(get.plot.highlight(data=cmh,l.main = "Relaxed Clustering",highlight = relaxed.cluster,highlight.col = relaxed.cluster$color))
## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please
## use `guide = "none"` instead.
#dev.off()
#png("res/demo-relaxed.cluster.af.png",width = 750,height = 500)
print(get.af.traj(l.sync = candSNP,l.cluster = relaxed.cluster,l.cluster.id = "2.1"))
#dev.off()
table(targets$chr)
##
## 2 3
## 7 8
#7 targets on chr 2, 8 on chr 3
ggscatter(data=targets,x="freq",y="s")
#negative, non-linear relationship betweenn selection coefficient and starting allele frequency
#number of blocks:
length(table(stringent.cluster$id))
## [1] 16
length(table(relaxed.cluster$id))
## [1] 7
obtain_length<-function(x){
range_temp<-c()
for(id.iter in unique(x$id)){
temp<-subset(x,id==id.iter)
min_temp<-min(temp$pos)
max_temp<-max(temp$pos)
range_temp<-c(range_temp,max_temp-min_temp)
}
return(range_temp)
}
#SNP number ~ min.cl.cor
marker_SNPs<-data.frame(n=c(table(stringent.cluster$id),table(relaxed.cluster$id)))
marker_SNPs$type<-c(rep("stringent",length(unique(stringent.cluster$id))),rep("relaxed",length(unique(relaxed.cluster$id))))
ggboxplot(data=marker_SNPs,x="type",y="n")
#block length ~ min.cl.cor
block_length<-data.frame(length=c(obtain_length(stringent.cluster),obtain_length(relaxed.cluster)))
block_length$type<-c(rep("stringent",length(unique(stringent.cluster$id))),rep("relaxed",length(unique(relaxed.cluster$id))))
ggboxplot(data=block_length,x="type",y="length")
table(targets$pos%in%stringent.cluster$pos)
##
## FALSE TRUE
## 9 6
table(targets$pos%in%relaxed.cluster$pos)
##
## FALSE TRUE
## 3 12
id_stringent<-which(targets$pos%in%stringent.cluster$pos)
pos_stringent<-targets$pos[id_stringent]
table(stringent.cluster$id[stringent.cluster$pos%in%pos_stringent])
##
## 2.1 2.2 2.3 2.4 2.5 2.6 2.7 2.8 2.9 3.1 3.2 3.3 3.4 3.5 3.6 3.7
## 1 0 0 0 0 0 1 1 0 1 0 0 0 1 0 1
#stringent: blocks without targets
id_relaxed<-which(targets$pos%in%relaxed.cluster$pos)
pos_relaxed<-targets$pos[id_relaxed]
table(relaxed.cluster$id[relaxed.cluster$pos%in%pos_relaxed])
##
## 2.1 2.2 2.3 3.1 3.2 3.3 3.4
## 3 2 2 0 2 2 1
#relaxed: blocks with multiple targets
targets$stringent<-rep("No", length(targets$stringent))
targets$stringent[id_stringent]<-"Yes"
targets$stringent<-as.factor(targets$stringent)
ggscatter(targets,x="freq",y="s",color = "stringent")
#higher s with same freq tend to be caught
relaxed.cluster.small<-perform_clustering(candSNP,l.min.cl.cor = 0.2,l.win.size = 5e+05)
## [1] "Initialize Time Series ..."
## Warning in validity_initialize_SNP_time_series(chr = chr, pos = pos, base.freq = base.freq, : The set minfreqchange is below the recommend change
## of 0.2, keep this in mind when working with the results.
## Warning in validity_initialize_SNP_time_series(chr = chr, pos = pos, base.freq = base.freq, : The set minrepl parameter is below the recommend number
## of 3, keep this in mind when working with the results.
## Warning in validity_initialize_SNP_time_series(chr = chr, pos = pos, base.freq = base.freq, : A too large range of starting frequencies is not
## recommended because it can result in too noisy correlations.
## Consider to modify parameters 'min.minor.freq' and 'max.minor.freq'.
## This parameter setting should only be used for exploratory purposes.
## [1] "Clustering on chromosome 2 is running. Please be patient ..."
## Warning in validity_reconstruct_hb(object, chrom, min.cl.size, min.cl.cor, : min.cl.cor below 0.4 should not be used!
## Be aware that the used correlation threshold is extremely weak
## and the chance is high that unrelateted things are clustered together!
## Warning in validity_initialize_SNP_time_series(chr = chr, pos = pos, base.freq = base.freq, : The set minfreqchange is below the recommend change
## of 0.2, keep this in mind when working with the results.
## Warning in validity_initialize_SNP_time_series(chr = chr, pos = pos, base.freq = base.freq, : The set minrepl parameter is below the recommend number
## of 3, keep this in mind when working with the results.
## Warning in validity_initialize_SNP_time_series(chr = chr, pos = pos, base.freq = base.freq, : A too large range of starting frequencies is not
## recommended because it can result in too noisy correlations.
## Consider to modify parameters 'min.minor.freq' and 'max.minor.freq'.
## This parameter setting should only be used for exploratory purposes.
## [1] "Clustering on chromosome 3 is running. Please be patient ..."
## Warning in validity_reconstruct_hb(object, chrom, min.cl.size, min.cl.cor, : min.cl.cor below 0.4 should not be used!
## Be aware that the used correlation threshold is extremely weak
## and the chance is high that unrelateted things are clustered together!
relaxed.cluster.big<-perform_clustering(candSNP,l.min.cl.cor = 0.2, l.win.size = 1e+07)
## [1] "Initialize Time Series ..."
## Warning in validity_initialize_SNP_time_series(chr = chr, pos = pos, base.freq = base.freq, : The set minfreqchange is below the recommend change
## of 0.2, keep this in mind when working with the results.
## Warning in validity_initialize_SNP_time_series(chr = chr, pos = pos, base.freq = base.freq, : The set minrepl parameter is below the recommend number
## of 3, keep this in mind when working with the results.
## Warning in validity_initialize_SNP_time_series(chr = chr, pos = pos, base.freq = base.freq, : A too large range of starting frequencies is not
## recommended because it can result in too noisy correlations.
## Consider to modify parameters 'min.minor.freq' and 'max.minor.freq'.
## This parameter setting should only be used for exploratory purposes.
## [1] "Clustering on chromosome 2 is running. Please be patient ..."
## Warning in validity_reconstruct_hb(object, chrom, min.cl.size, min.cl.cor, : min.cl.cor below 0.4 should not be used!
## Be aware that the used correlation threshold is extremely weak
## and the chance is high that unrelateted things are clustered together!
## Warning in validity_initialize_SNP_time_series(chr = chr, pos = pos, base.freq = base.freq, : The set minfreqchange is below the recommend change
## of 0.2, keep this in mind when working with the results.
## Warning in validity_initialize_SNP_time_series(chr = chr, pos = pos, base.freq = base.freq, : The set minrepl parameter is below the recommend number
## of 3, keep this in mind when working with the results.
## Warning in validity_initialize_SNP_time_series(chr = chr, pos = pos, base.freq = base.freq, : A too large range of starting frequencies is not
## recommended because it can result in too noisy correlations.
## Consider to modify parameters 'min.minor.freq' and 'max.minor.freq'.
## This parameter setting should only be used for exploratory purposes.
## [1] "Clustering on chromosome 3 is running. Please be patient ..."
## Warning in validity_reconstruct_hb(object, chrom, min.cl.size, min.cl.cor, : min.cl.cor below 0.4 should not be used!
## Be aware that the used correlation threshold is extremely weak
## and the chance is high that unrelateted things are clustered together!
r<-obtain_colors(relaxed.cluster.small,cmh)
#png("res/demo-relaxed.cluster.s.png",width = 750,height = 500)
print(get.plot.highlight(data=cmh,l.main = "Relaxed Clustering small",highlight = r,highlight.col = r$color))
## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please
## use `guide = "none"` instead.
#dev.off()
#png("res/demo-relaxed.cluster.s.af.png",width = 750,height = 500)
print(get.af.traj(l.sync = candSNP,l.cluster = r,l.cluster.id = "2.1"))
#dev.off()
r<-obtain_colors(relaxed.cluster.big,cmh)
#png("res/demo-relaxed.cluster.b.png",width = 750,height = 500)
print(get.plot.highlight(data=cmh,l.main = "Relaxed Clustering big",highlight = r,highlight.col = r$color))
## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please
## use `guide = "none"` instead.
#dev.off()
#png("res/demo-relaxed.cluster.b.af.png",width = 750,height = 500)
print(get.af.traj(l.sync = candSNP,l.cluster = r,l.cluster.id = "2.1"))
#dev.off()
length(table(relaxed.cluster.big$id))
## [1] 3
length(table(relaxed.cluster$id))
## [1] 7
length(table(relaxed.cluster.small$id))
## [1] 19
#more clusters with decreasing window size
stringent.cluster.small<-perform_clustering(candSNP,l.min.cl.cor = 0.6,l.win.size = 5e+05)
## [1] "Initialize Time Series ..."
## Warning in validity_initialize_SNP_time_series(chr = chr, pos = pos, base.freq = base.freq, : The set minfreqchange is below the recommend change
## of 0.2, keep this in mind when working with the results.
## Warning in validity_initialize_SNP_time_series(chr = chr, pos = pos, base.freq = base.freq, : The set minrepl parameter is below the recommend number
## of 3, keep this in mind when working with the results.
## Warning in validity_initialize_SNP_time_series(chr = chr, pos = pos, base.freq = base.freq, : A too large range of starting frequencies is not
## recommended because it can result in too noisy correlations.
## Consider to modify parameters 'min.minor.freq' and 'max.minor.freq'.
## This parameter setting should only be used for exploratory purposes.
## [1] "Clustering on chromosome 2 is running. Please be patient ..."
## Warning in validity_initialize_SNP_time_series(chr = chr, pos = pos, base.freq = base.freq, : The set minfreqchange is below the recommend change
## of 0.2, keep this in mind when working with the results.
## Warning in validity_initialize_SNP_time_series(chr = chr, pos = pos, base.freq = base.freq, : The set minrepl parameter is below the recommend number
## of 3, keep this in mind when working with the results.
## Warning in validity_initialize_SNP_time_series(chr = chr, pos = pos, base.freq = base.freq, : A too large range of starting frequencies is not
## recommended because it can result in too noisy correlations.
## Consider to modify parameters 'min.minor.freq' and 'max.minor.freq'.
## This parameter setting should only be used for exploratory purposes.
## [1] "Clustering on chromosome 3 is running. Please be patient ..."
stringent.cluster.big<-perform_clustering(candSNP,l.min.cl.cor = 0.6,l.win.size = 1e+07)
## [1] "Initialize Time Series ..."
## Warning in validity_initialize_SNP_time_series(chr = chr, pos = pos, base.freq = base.freq, : The set minfreqchange is below the recommend change
## of 0.2, keep this in mind when working with the results.
## Warning in validity_initialize_SNP_time_series(chr = chr, pos = pos, base.freq = base.freq, : The set minrepl parameter is below the recommend number
## of 3, keep this in mind when working with the results.
## Warning in validity_initialize_SNP_time_series(chr = chr, pos = pos, base.freq = base.freq, : A too large range of starting frequencies is not
## recommended because it can result in too noisy correlations.
## Consider to modify parameters 'min.minor.freq' and 'max.minor.freq'.
## This parameter setting should only be used for exploratory purposes.
## [1] "Clustering on chromosome 2 is running. Please be patient ..."
## Warning in validity_initialize_SNP_time_series(chr = chr, pos = pos, base.freq = base.freq, : The set minfreqchange is below the recommend change
## of 0.2, keep this in mind when working with the results.
## Warning in validity_initialize_SNP_time_series(chr = chr, pos = pos, base.freq = base.freq, : The set minrepl parameter is below the recommend number
## of 3, keep this in mind when working with the results.
## Warning in validity_initialize_SNP_time_series(chr = chr, pos = pos, base.freq = base.freq, : A too large range of starting frequencies is not
## recommended because it can result in too noisy correlations.
## Consider to modify parameters 'min.minor.freq' and 'max.minor.freq'.
## This parameter setting should only be used for exploratory purposes.
## [1] "Clustering on chromosome 3 is running. Please be patient ..."
r<-obtain_colors(stringent.cluster.big,cmh)
#png("res/demo-stringent.cluster.b.af.png",width = 750,height = 500)
print(get.af.traj(l.sync = candSNP,l.cluster = r,l.cluster.id = "2.1"))
#dev.off()
#png("res/demo-stringent.cluster.b.png",width=750,height=500)
print(get.plot.highlight(data = cmh,l.main="Stringent Clustering big",highlight = r,highlight.col = r$color))
## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please
## use `guide = "none"` instead.
#dev.off()
r<-obtain_colors(stringent.cluster.small,cmh)
#png("res/demo-stringent.cluster.s.af.png",width = 750,height = 500)
print(get.af.traj(l.sync = candSNP,l.cluster = r,l.cluster.id = "2.1"))
#dev.off()
#png("res/demo-stringent.cluster.s.png",width=750,height=500)
print(get.plot.highlight(data = cmh,l.main="Stringent Clustering small",highlight = r,highlight.col = r$color))
## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please
## use `guide = "none"` instead.
#dev.off()
length(table(stringent.cluster.big$id))
## [1] 16
length(table(stringent.cluster$id))
## [1] 16
length(table(stringent.cluster.small$id))
## [1] 16
Define information of our sample
repl <- 1:10 #replicates id
gens <- seq(0,60,10) #generations sampled
ngen=length(gens) #number of time points sampled
nrep=length(repl) #number of replicates
##parameters for haplovalidate##
# define which columns contain the base population (e.g. the first 5 columns)
base.pops <- c(rep(TRUE, length(repl)),rep(FALSE,length(repl)*(ngen-1)))
#define which columns should be included in the analysis
compare <- rep(TRUE,length(repl)*ngen)
# define which column contains which replicate
pop.ident <- rep(repl,ngen)
#define which column contains which generation
pop.generation <- rep(gens, each=length(repl))
Load simulated data
#define our working directory
wdir="/Users/sduarri/Dropbox (PopGen)/Sara/year2/eecourse/day3-haplotypes/"
#wdir="~/Dropbox (PopGen)/Sara/year2/eecourse/haplotypes/"
setwd(wdir) #note , run directly on terminal :)
#load cmh result and our simulated data
cmh.raw <- readRDS(paste0(wdir, "data/", "sim929_acer_cmh.rds"))
cands.all <- readRDS(paste0(wdir, "data/","sim929_cands.rds"))
cmh is storing a table with 1) chromosome 2) position 3)log10(pval), from CMH. This includes signficant and non-significant snps
Cands includes 1) chromosome 2) position 3) ref allele 4) rising allele 5) falling allele 6) mean starting/base frequency 7-76) frequencies of each replicate across each time point (sorted by time points). To save some memory, cands is prefiltered, but usually this should have the same number of rows as cmh.
(If needed, how to create a cands file?)
#from a sync file
# define which columns of the sync file contain the base population (e.g. the first 5 columns)
#base.pops <- c(rep(TRUE, length(repl)),rep(FALSE,length(repl)*(ngen-1)))
# define which columns should be used to polarize for the rising allele
#list(c(F0Rep1,FNRep1),c(F0Rep2,FNRep2),...))
#polaRise =list()
#for (r in repl){
# polaRise[[r]]=(c(r, (r+nrep*(ngen-1))))
#}
#cands.all <- sync_to_frequencies(syncfile,base.pops=base.pops,header=FALSE,mincov=15,polaRise = polaRise)
It can also be created from a vcf file 1. obtain a AF file, this usually provides ref & alt, and usually tracks af for alt snp (check if that is your case) 2. define median starting frequency for each SNP & store it 3. define median “end” frequency (generation that we want to polarize AFC for) 4a. if AFC end-starting (median frequency) is positive, no need to change AF. rising = alt, falling = ref 4b. if AFC is negative change all frequencies for the SNP to 1-AF. rising = ref, falling = alt
Get significant snps, and their AF
cmh <- cmh.raw[score>1.3,]
cands= merge(cmh[,.(chr,pos)],cands.all,on=.(chr,pos))
Define windows size for each chromosome
parameters <- get.mncs.win(cands,cmh, wins=seq(0.1,10,0.05),0.01)
print(parameters)
## chr win
## 1: 2 2.75
## 2: 3 4.05
Run haplovalidate!
start <- Sys.time()
happy <- haplovalidate(cands,cmh,parameters=parameters,pop.ident=pop.ident,pop.generation=pop.generation,base.pops=base.pops,compare=compare,takerandom=2000,filterrange=5000)
## Warning in validity_initialize_SNP_time_series(chr = chr, pos = pos, base.freq = base.freq, : The set minfreqchange is below the recommend change
## of 0.2, keep this in mind when working with the results.
## Warning in validity_initialize_SNP_time_series(chr = chr, pos = pos, base.freq = base.freq, : The set minrepl parameter is below the recommend number
## of 3, keep this in mind when working with the results.
## Warning in validity_initialize_SNP_time_series(chr = chr, pos = pos, base.freq = base.freq, : A too large range of starting frequencies is not
## recommended because it can result in too noisy correlations.
## Consider to modify parameters 'min.minor.freq' and 'max.minor.freq'.
## This parameter setting should only be used for exploratory purposes.
## Warning in validity_initialize_SNP_time_series(chr = chr, pos = pos, base.freq = base.freq, : The set minfreqchange is below the recommend change
## of 0.2, keep this in mind when working with the results.
## Warning in validity_initialize_SNP_time_series(chr = chr, pos = pos, base.freq = base.freq, : The set minrepl parameter is below the recommend number
## of 3, keep this in mind when working with the results.
## Warning in validity_initialize_SNP_time_series(chr = chr, pos = pos, base.freq = base.freq, : A too large range of starting frequencies is not
## recommended because it can result in too noisy correlations.
## Consider to modify parameters 'min.minor.freq' and 'max.minor.freq'.
## This parameter setting should only be used for exploratory purposes.
## [1] "chr 2 cluster corr 0.9"
## [1] "found 0 clusters"
## [1] "chr 2 cluster corr 0.8"
## [1] "found 0 clusters"
## [1] "chr 2 cluster corr 0.7"
## [1] "found 6 clusters"
## [1] "chr 2 cluster corr 0.6"
## [1] "found 8 clusters"
## [1] "chr 2 cluster corr 0.5"
## Warning in validity_reconstruct_hb(object, chrom, min.cl.size, min.cl.cor, :
## min.cl.cor below 0.6 is not recommended.
## [1] "found 15 clusters"
## [1] "chr 2 cluster corr 0.4"
## Warning in validity_reconstruct_hb(object, chrom, min.cl.size, min.cl.cor, :
## min.cl.cor below 0.6 is not recommended.
## [1] "found 14 clusters"
## [1] "chr 2 cluster corr 0.3"
## Warning in validity_reconstruct_hb(object, chrom, min.cl.size, min.cl.cor, : min.cl.cor below 0.4 should not be used!
## Be aware that the used correlation threshold is extremely weak
## and the chance is high that unrelateted things are clustered together!
## [1] "found 8 clusters"
## [1] "chr 3 cluster corr 0.9"
## [1] "found 0 clusters"
## [1] "chr 3 cluster corr 0.8"
## [1] "found 2 clusters"
## [1] "chr 3 cluster corr 0.7"
## [1] "found 7 clusters"
## [1] "chr 3 cluster corr 0.6"
## [1] "found 6 clusters"
## [1] "chr 3 cluster corr 0.5"
## Warning in validity_reconstruct_hb(object, chrom, min.cl.size, min.cl.cor, :
## min.cl.cor below 0.6 is not recommended.
## [1] "found 8 clusters"
## [1] "chr 3 cluster corr 0.4"
## Warning in validity_reconstruct_hb(object, chrom, min.cl.size, min.cl.cor, :
## min.cl.cor below 0.6 is not recommended.
## [1] "found 15 clusters"
## [1] "chr 3 cluster corr 0.3"
## Warning in validity_reconstruct_hb(object, chrom, min.cl.size, min.cl.cor, : min.cl.cor below 0.4 should not be used!
## Be aware that the used correlation threshold is extremely weak
## and the chance is high that unrelateted things are clustered together!
## [1] "found 7 clusters"
## [1] "chr 2 corr 0.5"
## [1] "found 15 clusters"
## [1] "corr 0.5"
## ...............[1] " pval 0.0395 for chr 2"
## [1] "chr 2 corr 0.6"
## [1] "found 8 clusters"
## [1] "corr 0.6"
## ........[1] " pval 0.2527 for chr 2"
## [1] "chr 2 corr 0.7"
## [1] "found 7 clusters"
## [1] "corr 0.7"
## ......[1] " pval 0.0002 for chr 2"
## [1] "chr 2 corr 0.6"
## [1] "found 8 clusters"
## [1] "corr 0.6"
## ........[1] " pval 0.2527 for chr 2"
## [1] "found 7 clusters"
## [1] "found 8 clusters"
## [1] "corr 0.69"
## .......[1] " pval 0 for chr 2"
## [1] "found 7 clusters"
## [1] "found 8 clusters"
## [1] "corr 0.68"
## .......[1] " pval 0 for chr 2"
## [1] "found 8 clusters"
## [1] "found 9 clusters"
## [1] "corr 0.67"
## ........[1] " pval 0.0006 for chr 2"
## [1] "found 7 clusters"
## [1] "found 11 clusters"
## [1] "corr 0.66"
## .......[1] " pval 0.0194 for chr 2"
## [1] "found 7 clusters"
## [1] "found 10 clusters"
## [1] "corr 0.65"
## .......[1] " pval 0.0207 for chr 2"
## [1] "found 8 clusters"
## [1] "found 8 clusters"
## [1] "corr 0.64"
## ........[1] " pval 0.062 for chr 2"
## [1] "Success!"
## [1] "chr 2 done"
## chr corr clust pos tag
## 1: 2 0.64 1 6597257 2_0.64_1
## 2: 2 0.64 1 6597852 2_0.64_1
## 3: 2 0.64 1 6601865 2_0.64_1
## 4: 2 0.64 1 6609701 2_0.64_1
## 5: 2 0.64 1 6612063 2_0.64_1
## ---
## 520: 2 0.64 8 31614678 2_0.64_8
## 521: 2 0.64 8 31618854 2_0.64_8
## 522: 2 0.64 8 31620365 2_0.64_8
## 523: 2 0.64 8 31651886 2_0.64_8
## 524: 2 0.64 8 32253838 2_0.64_8
## [1] "chr 3 corr 0.4"
## [1] "found 15 clusters"
## [1] "corr 0.4"
## ...............[1] " pval 0.4692 for chr 3"
## [1] "chr 3 corr 0.5"
## [1] "found 15 clusters"
## [1] "corr 0.5"
## ........[1] " pval 0.1434 for chr 3"
## [1] "chr 3 corr 0.6"
## [1] "found 8 clusters"
## [1] "corr 0.6"
## ......[1] "chr 3 corr 0.5"
## [1] "found 15 clusters"
## [1] "corr 0.5"
## ........[1] " pval 0.1434 for chr 3"
## [1] "chr 3 corr 0.6"
## [1] "found 8 clusters"
## [1] "corr 0.6"
## ......[1] "chr 3 corr 0.5"
## [1] "found 15 clusters"
## [1] "corr 0.5"
## ........[1] " pval 0.1434 for chr 3"
## [1] "chr 3 corr 0.6"
## [1] "found 8 clusters"
## [1] "corr 0.6"
## ......[1] "chr 3 corr 0.5"
## [1] "found 15 clusters"
## [1] "corr 0.5"
## ........[1] " pval 0.1434 for chr 3"
## [1] "chr 3 corr 0.6"
## [1] "found 8 clusters"
## [1] "corr 0.6"
## ......[1] "chr 3 corr 0.5"
## [1] "found 15 clusters"
## [1] "corr 0.5"
## ........[1] " pval 0.1434 for chr 3"
## [1] "chr 3 corr 0.6"
## [1] "found 8 clusters"
## [1] "corr 0.6"
## ......[1] "chr 3 corr 0.5"
## [1] "found 15 clusters"
## [1] "corr 0.5"
## ........[1] " pval 0.1434 for chr 3"
## [1] "chr 3 corr 0.6"
## [1] "found 8 clusters"
## [1] "corr 0.6"
## ......[1] "chr 3 corr 0.5"
## [1] "found 15 clusters"
## [1] "corr 0.5"
## ........[1] " pval 0.1434 for chr 3"
## [1] "chr 3 corr 0.6"
## [1] "found 8 clusters"
## [1] "corr 0.6"
## ......[1] "chr 3 corr 0.5"
## [1] "found 15 clusters"
## [1] "corr 0.5"
## ........[1] " pval 0.1434 for chr 3"
## [1] "chr 3 corr 0.6"
## [1] "found 8 clusters"
## [1] "corr 0.6"
## ......[1] "chr 3 corr 0.5"
## [1] "found 15 clusters"
## [1] "corr 0.5"
## ........[1] " pval 0.1434 for chr 3"
## [1] "chr 3 corr 0.6"
## [1] "found 8 clusters"
## [1] "corr 0.6"
## ......[1] "chr 3 corr 0.5"
## [1] "found 15 clusters"
## [1] "corr 0.5"
## ........[1] " pval 0.1434 for chr 3"
## [1] "chr 3 done; no threshold!"
#happy <- haplovalidate(cands,cmh,parameters=parameters,takerandom=2000,filterrange=5000, repl = repl, gens = gens)
end <- Sys.time()
#plot dominant blocks (will be stored in your working directory)
plot.haplovalidate(blocks=happy$dominant_haplotypes,cmh,title="sim929",label=T)
## chr pos tag groupcol
## 1: 3 29587701 3_0.5_7_noThreshold #3288BD
## 2: 3 29587148 3_0.5_7_noThreshold #3288BD
## 3: 3 29587623 3_0.5_7_noThreshold #3288BD
## 4: 3 29587119 3_0.5_7_noThreshold #3288BD
## 5: 3 29587549 3_0.5_7_noThreshold #3288BD
## ---
## 2279: 3 11202507 3_0.5_1_noThreshold #D7D5A7
## 2280: 3 10710848 3_0.5_1_noThreshold #D7D5A7
## 2281: 3 11180560 3_0.5_1_noThreshold #D7D5A7
## 2282: 3 10902786 3_0.5_1_noThreshold #D7D5A7
## 2283: 3 10074133 3_0.5_1_noThreshold #D7D5A7
#if needed save the data
saveRDS(happy,paste0(wdir,"results/", "sim929_hapval.rds"))
Check if we detected selected targets
happy_dom <- happy$dominant_haplotypes
#cmh <- readRDS(paste0(wdir, "data/", "sim929_acer_cmh.rds"))
#cands.all <- readRDS(paste0(wdir, "data/","sim929_cands.rds"))
#load selected targets
targets <- readRDS(paste0(wdir, "data/", "929_targets_all.rds"))
happy.cmh <- merge(cmh,happy_dom,by=c("chr","pos"))
targets.cmh <- merge(cmh,targets,by=c("chr","pos"))
#plot
par(mfrow=c(2,1))
for (i in 2:3){
cmh[chr==i,plot(pos,score,pch=19,col="grey")]
happy.cmh[chr==i,points(pos,score,col=factor(tag),pch=19)]
targets.cmh[chr==i,points(pos,score,col="orange",pch=19)]
}